Preparations

Load the necessary libraries

library(gbm)         #for gradient boosted models
library(car)
library(dismo)
library(pdp)
library(ggfortify)
library(randomForest)
library(tidyverse)
library(patchwork)
library(stars)

Scenario

Leathwick et al. (2008) compiled capture data of short-finned eels (_Anguilla australis) within New Zealand freshwater streams to explore the distribution of the eels for conservation purposes. The goal was to be able to model the presence/absence of the eels against a range of environmental characteristics so as to predict their more broader occurances and identify which predictors are the most important in the predictions.

eel

Format of leathwick.csv data file

Site Angaus SegSumT SegTSeas SegLowFlow
1 0 16.0 -0.10 1.036
2 1 18.7 1.51 1.003
3 0 18.3 0.37 1.001
4 0 16.7 -3.80 1.000
5 1 17.2 0.33 1.005
6 0 15.1 1.83 1.015
.. .. .. .. ..
Site Unique label for each site.
Angaus Presence (1) or absence (0) of Anguilla australis eels
SegSumT Summer air temperature (degrees celcius) at the river segment
scale
SegTSeas Winter temperature normalised to January temperature at the river
segment scale
SegLowFlow Forth root transformed low flow rate at the river segment scale
DSDist Distance to coast (km) (a downstream predictor)
DSDam Presence of known downsteam obstructions (a downstream predictor)
DSMaxSlope Maximum downstream slope (a downstream predictor)
USAvgT Upstream average tempeture (normalised for the river segment)
USRainDays Number of rainy days recorded in the upstream catchment
USSlope Slope of the river upstream
USNative Percentage of the upstream riparian vegetation that is native
Method Method used to capture the eels (categorical predictor)
LocSed Weighted average of the proportional cover of bed sediment
(1=mud, 2=sand, 3=fine gravel, 4=course gravel, 5=cobble, 6=boulder, 7=bedrock)

Read in the data

leathwick = read_csv('../public/data/leathwick.csv', trim_ws=TRUE)
glimpse(leathwick)
## Rows: 1,000
## Columns: 14
## $ Site       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Angaus     <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,…
## $ SegSumT    <dbl> 16.0, 18.7, 18.3, 16.7, 17.2, 15.1, 12.7, 18.1, 18.9, 18.2,…
## $ SegTSeas   <dbl> -0.10, 1.51, 0.37, -3.80, 0.33, 1.83, 2.17, 1.00, 1.59, 0.7…
## $ SegLowFlow <dbl> 1.036, 1.003, 1.001, 1.000, 1.005, 1.015, 1.001, 1.002, 1.0…
## $ DSDist     <dbl> 50.2000, 132.5300, 107.4400, 166.8200, 3.9500, 11.1700, 42.…
## $ DSMaxSlope <dbl> 0.57, 1.15, 0.57, 1.72, 1.15, 1.72, 2.86, 2.29, 0.40, 3.43,…
## $ USAvgT     <dbl> 0.09, 0.20, 0.49, 0.90, -1.20, -0.20, 1.45, 0.47, 0.25, 0.0…
## $ USRainDays <dbl> 2.470, 1.153, 0.847, 0.210, 1.980, 3.300, 0.430, 1.153, 0.8…
## $ USSlope    <dbl> 9.8, 8.3, 0.4, 0.4, 21.9, 25.7, 9.6, 4.9, 9.8, 20.5, 3.9, 6…
## $ USNative   <dbl> 0.81, 0.34, 0.00, 0.22, 0.96, 1.00, 0.09, 0.02, 0.74, 0.92,…
## $ DSDam      <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ Method     <chr> "electric", "electric", "spo", "electric", "electric", "ele…
## $ LocSed     <dbl> 4.8, 2.0, 1.0, 4.0, 4.7, 4.5, 4.3, NA, NA, 3.6, 3.7, 1.0, 3…
leathwick <- leathwick %>%
    mutate(Method=factor(Method),
           LocSed = as.numeric(LocSed)) %>% 
    ## LocSed=factor(LocSed)) %>%
    as.data.frame
leathwick_test = read_csv('../public/data/leathwick_test.csv', trim_ws=TRUE)
glimpse(leathwick_test)
## Rows: 500
## Columns: 13
## $ Angaus_obs <dbl> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ SegSumT    <dbl> 16.6, 16.8, 16.3, 15.6, 14.6, 16.7, 18.3, 16.4, 17.2, 15.7,…
## $ SegTSeas   <dbl> 1.01, -0.51, 0.76, 1.56, -0.20, 0.80, 0.67, 1.44, -0.67, -0…
## $ SegLowFlow <dbl> 1.017, 1.002, 1.023, 1.003, 1.023, 1.003, 1.005, 1.031, 1.0…
## $ DSDist     <dbl> 5.2300, 2.2400, 162.2800, 4.0500, 127.0300, 2.4800, 94.0770…
## $ DSMaxSlope <dbl> 0.29, 0.00, 5.14, 0.57, 1.72, 14.57, 0.57, 1.72, 1.72, 2.86…
## $ USAvgT     <dbl> -1.40, 0.27, -0.60, 1.14, -1.90, -1.50, 0.54, 0.75, -1.80, …
## $ USRainDays <dbl> 1.980, 0.460, 0.806, 3.300, 1.940, 1.980, 0.847, 2.249, 0.5…
## $ USSlope    <dbl> 10.0, 0.7, 21.4, 0.9, 28.9, 22.0, 1.0, 4.8, 26.4, 26.1, 23.…
## $ USNative   <dbl> 1.00, 0.00, 0.66, 0.75, 0.97, 0.99, 0.01, 0.02, 0.74, 0.99,…
## $ DSDam      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,…
## $ Method     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ LocSed     <dbl> 4.9, 2.3, 4.3, 1.0, NA, 2.8, NA, NA, 4.2, 4.1, 4.3, 1.2, NA…
leathwick_test = leathwick_test %>%
    mutate(Method=factor(Method), 
           LocSed=as.numeric(LocSed)) %>%
  as.data.frame()

Exploratory data analysis

scatterplotMatrix(~Angaus+SegSumT+SegTSeas+SegLowFlow+DSDist+DSMaxSlope+DSDam+
                    USAvgT+USRainDays+USSlope+USNative+Method+LocSed,  data=leathwick,
                  diagonal=list(method='boxplot'))

Fit the model

Partial plots

leathwick.gbm %>%
    pdp::partial(pred.var='SegSumT',
                 n.trees=best.iter,
                 inv.link=plogis,
                 recursive=FALSE,
                 type='regression') %>%
    autoplot() +
    ylim(0,1)

nms <- colnames(leathwick)
p <- vector('list', 12)
names(p) <- nms[3:14]
for (nm in nms[3:14]) {
  print(nm)
  p[[nm]] <- leathwick.gbm %>% pdp::partial(pred.var=nm,
                                 n.trees=best.iter,
                                 inv.link=plogis,
                                 recursive=FALSE,
                                 type='regression') %>%
    autoplot() + ylim(0, 1)
}
## [1] "SegSumT"
## [1] "SegTSeas"
## [1] "SegLowFlow"
## [1] "DSDist"
## [1] "DSMaxSlope"
## [1] "USAvgT"
## [1] "USRainDays"
## [1] "USSlope"
## [1] "USNative"
## [1] "DSDam"
## [1] "Method"
## [1] "LocSed"
do.call('grid.arrange', p)

Assessing accuracy

Plot spatial distribution of eels

data(Anguilla_grids)
leathwick.grid = Anguilla_grids
glimpse(leathwick.grid)
## Formal class 'RasterBrick' [package "raster"] with 12 slots
##   ..@ file    :Formal class '.RasterFile' [package "raster"] with 13 slots
##   ..@ data    :Formal class '.MultipleRasterData' [package "raster"] with 14 slots
##   ..@ legend  :Formal class '.RasterLegend' [package "raster"] with 5 slots
##   ..@ title   : chr(0) 
##   ..@ extent  :Formal class 'Extent' [package "raster"] with 4 slots
##   ..@ rotated : logi FALSE
##   ..@ rotation:Formal class '.Rotation' [package "raster"] with 2 slots
##   ..@ ncols   : int 250
##   ..@ nrows   : int 196
##   ..@ crs     :Formal class 'CRS' [package "sp"] with 1 slot
##   ..@ history : list()
##   ..@ z       : list()
##   ..$ layernames: chr [1:11] "DSDam" "DSDist" "DSMaxSlope" "LocSed" ...
plot(leathwick.grid)

Method <- factor('electric', levels=levels(leathwick$Method))
Method = as.data.frame(Method)

fit <- predict(leathwick.grid, leathwick.gbm,  const=Method,
               n.trees=best.iter,  type='response')
#fit <- mask(fit,  raster(leathwick.grid, 1))
library(stars)
fit= stars::st_as_stars(fit)


ggplot() +
  geom_stars(data=fit) +
  scale_fill_gradient(low='red', high='green', 'Probability\nof occurrance', na.value=NA) +
  coord_sf(expand=FALSE) +
  theme_bw()

References

Leathwick, J. R., J. Elith, W. L. Chadderton, D. Rowe, and T. Hastie. 2008. “Dispersal, Disturbance and the Contrasting Biogeographies of New Zealand’s Diadromous and Non-Diadromous Fish Species.” Journal of Biogeography 35 (8): 1481–97. https://doi.org/https://doi.org/10.1111/j.1365-2699.2008.01887.x.